“Files Used:”

Z:/COVID-19_WastewaterAnalysis/data/processed/MMSD_Interceptor_Cases_2_7_22.csv

Z:/COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv

MadDF <- filter(FullDF,Site=="Madison")%>%
    mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
           #Manual flagging that method misses due to boundary effect with binning
           FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
                                   TRUE, FlagedOutliers),
           NoOutlierVar = ifelse(FlagedOutliers, NA, N1))

KeyOulierPoints <- c(ymd("2021-06-08"),
                     ymd("2021-10-17"),
                     ymd("2021-05-02"),
                     ymd("2021-01-26"))

NonOuliersDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
            filter(!(is.na(N1)))

OutLierPlotDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
  filter(!(is.na(N1)&is.na(Outlier)))%>%
  ggplot(aes(x=Date))+#Data depends on time 
  geom_point(aes(y=N1,
                color="N1", 
                info = N1),data=NonOuliersDF,size=.5)+#compares N1
  geom_point(aes(y=Outlier,
                 color="N1 Outlier",
                 info = Outlier))+
  theme_light() +
  #scale_y_log10()+
  scale_color_manual(values=c("#F8766D","#999999"))

ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"
SizeUsed = 1.5
alphaUsed = .9
N1ShapeUnit = 4
N2ShapeUnit = 5

IntercepterDF <- FullDF %>%
  mutate(Pop = case_when(
    Site=="MMSD-P2" ~ 111967,
    Site=="MMSD-P7" ~ 81977,
    Site=="MMSD-P8" ~ 127634,
    Site=="MMSD-P11" ~ 130799,
    Site=="MMSD-P18" ~ 151470,
    Site=="Madison" ~ 603847,
  ))%>%
  group_by(Site)%>%
  mutate(FudgeFactor = mean(N1))%>%#Mean of sites to see if it works as normalizer
  filter(Site != "Madison") #We are looking for agreement between the interceptors
                            #after some normalization so Madison just distracts




IntercepterOverLay <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  theme_light() +
  scale_y_log10()

ggplotly(IntercepterOverLay)
IntercepterOverLayFlow <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  theme_light() +
  scale_y_log10()

ggplotly(IntercepterOverLayFlow)
IntercepterOverLayPop <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2/Pop, color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  theme_light() +
  scale_y_log10()

ggplotly(IntercepterOverLayPop)
IntercepterDF%>%
  ggplot()+
  geom_histogram(aes(x=log(N1)))+
  facet_wrap(~Site)

IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1,ties.method = "random"))%>%
  group_by(Site)%>%
  summarize(N1AverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     N1AverageRanking
##   <chr>               <dbl>
## 1 MMSD-P11             1.42
## 2 MMSD-P18             2.41
## 3 MMSD-P2              3.00
## 4 MMSD-P7              3.75
## 5 MMSD-P8              4.37
IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1*FlowRate,ties.method = "random"))%>%
  group_by(Site)%>%
  summarize(N1FlowAverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     N1FlowAverageRanking
##   <chr>                   <dbl>
## 1 MMSD-P11                 1.51
## 2 MMSD-P18                 2.55
## 3 MMSD-P2                  2.96
## 4 MMSD-P7                  3.58
## 5 MMSD-P8                  4.36
IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1/Pop,ties.method = "random"))%>%
  group_by(Site)%>%
  summarize(N1PopAverageRanking = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     N1PopAverageRanking
##   <chr>                  <dbl>
## 1 MMSD-P11                1.37
## 2 MMSD-P18                2.31
## 3 MMSD-P2                 3.05
## 4 MMSD-P7                 3.92
## 5 MMSD-P8                 4.32
length(IntercepterDF$Date)
## [1] 2536
IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = frank(N1,ties.method = "random"))%>%
  group_by(Site,Ranking)%>%
  summarize(SiteNumber = n())%>%
  ggplot(aes(x=Ranking))+
  geom_point(aes(y=SiteNumber,color=Site))

IntercepterChangeDF <- IntercepterDF%>%
  filter(!is.na(N1))%>%
  mutate(ChangeN1 = lead(N1) - N1,
         ChangeN2 = lead(N2) - N2,
         PerChangeN1 = 200*(lead(N1) - N1)/(N1+lead(N1)),
         PerChangeN2 = 200*(lead(N2) - N2)/(N2+lead(N2)))


IntercepterChangeOverLay <- IntercepterChangeDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y = ChangeN1, color = Site), size = SizeUsed,
             alpha = alphaUsed, shape=N1ShapeUnit)+
  geom_point(aes(y = ChangeN2,color = Site), size = SizeUsed,
             alpha = alphaUsed, shape = N2ShapeUnit)+
  theme_light()#+
  #scale_y_log10()

ggplotly(IntercepterChangeOverLay)
IntercepterPerChangeOverLay <- IntercepterChangeDF%>%
  ggplot(aes(x = Date))+
  geom_point(aes(y = PerChangeN1, color = Site), size = SizeUsed,
             alpha = alphaUsed, shape = N1ShapeUnit)+
  geom_point(aes(y = PerChangeN2, color = Site), size = SizeUsed,
             alpha= alphaUsed,shape = N2ShapeUnit)+
  theme_light()#+
  #scale_y_log10()

ggplotly(IntercepterPerChangeOverLay)
LoessFunc <- function(SiteFilter,DF,SpanConstant = .163){
  MainDF <- DF%>%
    filter(Site==SiteFilter)
  MainDF[["loessN1"]] <- loessFit(y=(MainDF[["N1"]]), 
                      x=MainDF$Date, #create loess fit of the data
                      span=SpanConstant, #span of .2 seems to give the best result, not rigorously chosen
                      iterations=2)$fitted#2 iterations remove some bad patterns
return(MainDF)
}

SiteLoessDF <- lapply(c("MMSD-P11","MMSD-P18","MMSD-P2","MMSD-P7","MMSD-P8"),
                      LoessFunc,IntercepterDF,SpanConstant=.2)%>%
                bind_rows()

A <- SiteLoessDF%>%
  filter(!is.na(loessN1))%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1,color=Site),data=IntercepterDF,size=.5,alpha=.5)+
  geom_line(aes(y=loessN1,color=Site))+
  scale_y_log10()
ggplotly(A)
SevenDayMAFunc <- function(SiteFilter,DF){
  MainDF <- DF%>%
    filter(Site==SiteFilter)
  MainDF[["SLDCases"]] <- rollapply(data = MainDF$FirstConfirmed, width = 7, FUN = mean, 
                            na.rm = TRUE,fill=NA)
return(MainDF)
}

SiteLoessDF <- lapply(c("MMSD-P11","MMSD-P18","MMSD-P2","MMSD-P7","MMSD-P8"),
                      SevenDayMAFunc,IntercepterDF)%>%
                bind_rows()

A <- SiteLoessDF%>%
  filter(!is.na(SLDCases))%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=FirstConfirmed,color=Site,shape=Site),data=IntercepterDF,size=.5,alpha=.5)+
  geom_line(aes(y=SLDCases,color=Site))+
  scale_y_log10()
ggplotly(A)